{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "import seldon.pipeline.auto_transforms as auto\n",
    "import pandas as pd\n",
    "import logging\n",
    "logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO)\n",
    "\n",
    "df = pd.read_csv(\"http://archive.ics.uci.edu/ml/machine-learning-databases/wine/wine.data\",\n",
    "                 header=None,names=[\"target\",\"Alcohol\",\"Malic acid\",\"Ash\",\"Alcalinity of ash\",\"Magnesium\",\"Total phenols\",\"Flavanoids\",\n",
    "                         \"Nonflavanoid phenols\",\"Proanthocyanins\",\"Color intensity\",\"Hue\",\"OD280/OD315 of diluted wines\",\n",
    "                         \"Proline\"])\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>target</th>\n",
       "      <th>Alcohol</th>\n",
       "      <th>Malic acid</th>\n",
       "      <th>Ash</th>\n",
       "      <th>Alcalinity of ash</th>\n",
       "      <th>Magnesium</th>\n",
       "      <th>Total phenols</th>\n",
       "      <th>Flavanoids</th>\n",
       "      <th>Nonflavanoid phenols</th>\n",
       "      <th>Proanthocyanins</th>\n",
       "      <th>Color intensity</th>\n",
       "      <th>Hue</th>\n",
       "      <th>OD280/OD315 of diluted wines</th>\n",
       "      <th>Proline</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>14.23</td>\n",
       "      <td>1.71</td>\n",
       "      <td>2.43</td>\n",
       "      <td>15.6</td>\n",
       "      <td>127</td>\n",
       "      <td>2.80</td>\n",
       "      <td>3.06</td>\n",
       "      <td>0.28</td>\n",
       "      <td>2.29</td>\n",
       "      <td>5.64</td>\n",
       "      <td>1.04</td>\n",
       "      <td>3.92</td>\n",
       "      <td>1065</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>13.20</td>\n",
       "      <td>1.78</td>\n",
       "      <td>2.14</td>\n",
       "      <td>11.2</td>\n",
       "      <td>100</td>\n",
       "      <td>2.65</td>\n",
       "      <td>2.76</td>\n",
       "      <td>0.26</td>\n",
       "      <td>1.28</td>\n",
       "      <td>4.38</td>\n",
       "      <td>1.05</td>\n",
       "      <td>3.40</td>\n",
       "      <td>1050</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>1</td>\n",
       "      <td>13.16</td>\n",
       "      <td>2.36</td>\n",
       "      <td>2.67</td>\n",
       "      <td>18.6</td>\n",
       "      <td>101</td>\n",
       "      <td>2.80</td>\n",
       "      <td>3.24</td>\n",
       "      <td>0.30</td>\n",
       "      <td>2.81</td>\n",
       "      <td>5.68</td>\n",
       "      <td>1.03</td>\n",
       "      <td>3.17</td>\n",
       "      <td>1185</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>1</td>\n",
       "      <td>14.37</td>\n",
       "      <td>1.95</td>\n",
       "      <td>2.50</td>\n",
       "      <td>16.8</td>\n",
       "      <td>113</td>\n",
       "      <td>3.85</td>\n",
       "      <td>3.49</td>\n",
       "      <td>0.24</td>\n",
       "      <td>2.18</td>\n",
       "      <td>7.80</td>\n",
       "      <td>0.86</td>\n",
       "      <td>3.45</td>\n",
       "      <td>1480</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>1</td>\n",
       "      <td>13.24</td>\n",
       "      <td>2.59</td>\n",
       "      <td>2.87</td>\n",
       "      <td>21.0</td>\n",
       "      <td>118</td>\n",
       "      <td>2.80</td>\n",
       "      <td>2.69</td>\n",
       "      <td>0.39</td>\n",
       "      <td>1.82</td>\n",
       "      <td>4.32</td>\n",
       "      <td>1.04</td>\n",
       "      <td>2.93</td>\n",
       "      <td>735</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   target  Alcohol  Malic acid   Ash  Alcalinity of ash  Magnesium  \\\n",
       "0       1    14.23        1.71  2.43               15.6        127   \n",
       "1       1    13.20        1.78  2.14               11.2        100   \n",
       "2       1    13.16        2.36  2.67               18.6        101   \n",
       "3       1    14.37        1.95  2.50               16.8        113   \n",
       "4       1    13.24        2.59  2.87               21.0        118   \n",
       "\n",
       "   Total phenols  Flavanoids  Nonflavanoid phenols  Proanthocyanins  \\\n",
       "0           2.80        3.06                  0.28             2.29   \n",
       "1           2.65        2.76                  0.26             1.28   \n",
       "2           2.80        3.24                  0.30             2.81   \n",
       "3           3.85        3.49                  0.24             2.18   \n",
       "4           2.80        2.69                  0.39             1.82   \n",
       "\n",
       "   Color intensity   Hue  OD280/OD315 of diluted wines  Proline  \n",
       "0             5.64  1.04                          3.92     1065  \n",
       "1             4.38  1.05                          3.40     1050  \n",
       "2             5.68  1.03                          3.17     1185  \n",
       "3             7.80  0.86                          3.45     1480  \n",
       "4             4.32  1.04                          2.93      735  "
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Create an auto transform to scale numeric columns automatically."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>target</th>\n",
       "      <th>Alcohol</th>\n",
       "      <th>Malic acid</th>\n",
       "      <th>Ash</th>\n",
       "      <th>Alcalinity of ash</th>\n",
       "      <th>Magnesium</th>\n",
       "      <th>Total phenols</th>\n",
       "      <th>Flavanoids</th>\n",
       "      <th>Nonflavanoid phenols</th>\n",
       "      <th>Proanthocyanins</th>\n",
       "      <th>Color intensity</th>\n",
       "      <th>Hue</th>\n",
       "      <th>OD280/OD315 of diluted wines</th>\n",
       "      <th>Proline</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0</td>\n",
       "      <td>1.518613</td>\n",
       "      <td>-0.562250</td>\n",
       "      <td>0.232053</td>\n",
       "      <td>-1.169593</td>\n",
       "      <td>1.913905</td>\n",
       "      <td>0.808997</td>\n",
       "      <td>1.034819</td>\n",
       "      <td>-0.659563</td>\n",
       "      <td>1.224884</td>\n",
       "      <td>0.251717</td>\n",
       "      <td>0.362177</td>\n",
       "      <td>1.847920</td>\n",
       "      <td>1.013009</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>0</td>\n",
       "      <td>0.246290</td>\n",
       "      <td>-0.499413</td>\n",
       "      <td>-0.827996</td>\n",
       "      <td>-2.490847</td>\n",
       "      <td>0.018145</td>\n",
       "      <td>0.568648</td>\n",
       "      <td>0.733629</td>\n",
       "      <td>-0.820719</td>\n",
       "      <td>-0.544721</td>\n",
       "      <td>-0.293321</td>\n",
       "      <td>0.406051</td>\n",
       "      <td>1.113449</td>\n",
       "      <td>0.965242</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>0</td>\n",
       "      <td>0.196879</td>\n",
       "      <td>0.021231</td>\n",
       "      <td>1.109334</td>\n",
       "      <td>-0.268738</td>\n",
       "      <td>0.088358</td>\n",
       "      <td>0.808997</td>\n",
       "      <td>1.215533</td>\n",
       "      <td>-0.498407</td>\n",
       "      <td>2.135968</td>\n",
       "      <td>0.269020</td>\n",
       "      <td>0.318304</td>\n",
       "      <td>0.788587</td>\n",
       "      <td>1.395148</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>0</td>\n",
       "      <td>1.691550</td>\n",
       "      <td>-0.346811</td>\n",
       "      <td>0.487926</td>\n",
       "      <td>-0.809251</td>\n",
       "      <td>0.930918</td>\n",
       "      <td>2.491446</td>\n",
       "      <td>1.466525</td>\n",
       "      <td>-0.981875</td>\n",
       "      <td>1.032155</td>\n",
       "      <td>1.186068</td>\n",
       "      <td>-0.427544</td>\n",
       "      <td>1.184071</td>\n",
       "      <td>2.334574</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>0</td>\n",
       "      <td>0.295700</td>\n",
       "      <td>0.227694</td>\n",
       "      <td>1.840403</td>\n",
       "      <td>0.451946</td>\n",
       "      <td>1.281985</td>\n",
       "      <td>0.808997</td>\n",
       "      <td>0.663351</td>\n",
       "      <td>0.226796</td>\n",
       "      <td>0.401404</td>\n",
       "      <td>-0.319276</td>\n",
       "      <td>0.362177</td>\n",
       "      <td>0.449601</td>\n",
       "      <td>-0.037874</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   target   Alcohol  Malic acid       Ash  Alcalinity of ash  Magnesium  \\\n",
       "0       0  1.518613   -0.562250  0.232053          -1.169593   1.913905   \n",
       "1       0  0.246290   -0.499413 -0.827996          -2.490847   0.018145   \n",
       "2       0  0.196879    0.021231  1.109334          -0.268738   0.088358   \n",
       "3       0  1.691550   -0.346811  0.487926          -0.809251   0.930918   \n",
       "4       0  0.295700    0.227694  1.840403           0.451946   1.281985   \n",
       "\n",
       "   Total phenols  Flavanoids  Nonflavanoid phenols  Proanthocyanins  \\\n",
       "0       0.808997    1.034819             -0.659563         1.224884   \n",
       "1       0.568648    0.733629             -0.820719        -0.544721   \n",
       "2       0.808997    1.215533             -0.498407         2.135968   \n",
       "3       2.491446    1.466525             -0.981875         1.032155   \n",
       "4       0.808997    0.663351              0.226796         0.401404   \n",
       "\n",
       "   Color intensity       Hue  OD280/OD315 of diluted wines   Proline  \n",
       "0         0.251717  0.362177                      1.847920  1.013009  \n",
       "1        -0.293321  0.406051                      1.113449  0.965242  \n",
       "2         0.269020  0.318304                      0.788587  1.395148  \n",
       "3         1.186068 -0.427544                      1.184071  2.334574  \n",
       "4        -0.319276  0.362177                      0.449601 -0.037874  "
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df[\"target\"] = df[\"target\"] - 1\n",
    "t_auto = auto.Auto_transform(exclude=[\"target\"])\n",
    "df2 = t_auto.fit_transform(df)\n",
    "df2.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Create an XGBoost classifier and run 5-fold cross validation on the data."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/home/clive/tools/scikit-learn/sklearn/cross_validation.py:42: DeprecationWarning: This module has been deprecated in favor of the model_selection module into which all the refactored classes and functions are moved. Also note that the interface of the new CV iterators are different from that of this module. This module will be removed in 0.20.\n",
      "  \"This module will be removed in 0.20.\", DeprecationWarning)\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "Seldon_KFold(clf=XGBoostClassifier(base_score=0.5,\n",
       "         clf=XGBClassifier(base_score=0.5, colsample_bylevel=1, colsample_bytree=1,\n",
       "       gamma=0, learning_rate=0.1, max_delta_step=0, max_depth=3,\n",
       "       min_child_weight=1, missing=None, n_estimators=100, nthread=-1,\n",
       "       objective='multi:softprob', reg_al...torizer=DictVectorizer(dtype=<type 'numpy.float64'>, separator='=', sort=True,\n",
       "        sparse=True)),\n",
       "       k=5)"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from seldon import xgb\n",
    "import seldon.pipeline.cross_validation as cf\n",
    "xgb = xgb.XGBoostClassifier(target=\"target\")\n",
    "cv = cf.Seldon_KFold(xgb,5)\n",
    "cv.fit(df2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Average accuracy  0.96619047619\n"
     ]
    }
   ],
   "source": [
    "print \"Average accuracy \",cv.get_score()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 2",
   "language": "python",
   "name": "python2"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.10"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
}
